Here we load the data from the pressure transducers and identify any jumps.
#Load and trim data----
##This is the data from the pressure transducers
raw_cm <- read_csv('data/raw/sensor_2018_2019.csv') %>%
mutate(datetime = round_date(mdy_hm(datetime),'15 mins')) %>%
select(Event, site, datetime, stage, temp)
#Find data jumps----
## Jumps can come from sensors being moved when data was
## downloaded or sensors were disturbed by water.
jumps <- raw_cm %>%
group_by(site) %>%
arrange(site,datetime) %>%
mutate(lag_event = lag(Event), #creates lagged version
download = ifelse((Event - lag_event) >= 1 | is.na(lag_event), 0,1),
session = cumsum(download))
#Make a dataframe of the first and last X of measurements for each session. X is the of measurements to average over.
jump_hunter <- function(df = jumps, x = 10){
last_x <- df %>%
group_by(site,session) %>%
# Get the last X obs per site and session
slice_tail(n = x) %>%
#Take median stage and lasttime recorded
summarize(last = median(stage,na.rm=T),
lasttime = max(datetime))
#Same as above but for the first X slice of data
first_x <- jumps %>%
group_by(site,session) %>%
slice_head(n = x) %>%
summarize(first = median(stage,na.rm=T),
firsttime = min(datetime))
#Join these datasets and find the median stage difference and the time diff. The time diff is critical because with big time gaps the jumps are likely wrong.
plunge <- inner_join(first_x,last_x) %>%
group_by(site) %>%
mutate(lead_first = lead(first,1,last[n()]),
event_jump = last-lead_first,
cume_jump = rev(cumsum(rev(event_jump))),
timediff = lasttime - lead(firsttime,1,lasttime[n()])) %>%
select(site,session,cume_jump,lasttime)
return(plunge)
}
jumped <- jumps %>%
inner_join(jump_hunter(jumps)) %>%
#Varsovia (concrete channel) shouldn't jump so removed jumps here
#Attached to concrete channel in horizontal setting.
mutate(cume_jump = ifelse(site == 'varsovia',0, cume_jump)) %>%
mutate(jumped_stage = stage - cume_jump)
t.xts <- jumped %>%
ungroup() %>%
dplyr::filter(site == 'balas') %>%
dplyr::select(stage,jumped_stage,datetime) %>%
xts(. %>% select(-datetime),order.by=.$datetime)
dygraph(t.xts) %>%
dyOptions(useDataTimezone = T)
t.xts <- jumped %>%
ungroup() %>%
dplyr::filter(site == 'varsovia') %>%
dplyr::select(stage,jumped_stage,datetime) %>%
xts(. %>% select(-datetime),order.by=.$datetime)
dygraph(t.xts) %>%
dyOptions(useDataTimezone = T)
t.xts <- jumped %>%
ungroup() %>%
dplyr::filter(site == 'helado') %>%
dplyr::select(stage,jumped_stage,datetime) %>%
xts(. %>% select(-datetime),order.by=.$datetime)
dygraph(t.xts) %>%
dyOptions(useDataTimezone = T)
t.xts <- jumped %>%
ungroup() %>%
dplyr::filter(site == 'raices') %>%
dplyr::select(stage,jumped_stage,datetime) %>%
xts(. %>% select(-datetime),order.by=.$datetime)
dygraph(t.xts) %>%
dyOptions(useDataTimezone = T)
Next we compare the sensors data and weekly reference measurements (different than the rating curves which we will load later)
#In field observations
staff <- read_csv('data/raw/Staff_TS_27MAY21.csv') %>%
select(1:4) %>%
mutate(datetime = mdy_hm(paste(date,time))) %>%
dplyr::filter(!is.na(datetime)) %>%
mutate(datetime = round_date(datetime,'15 mins'))
##attach to data from transducers
staff_sensor <- staff %>%
inner_join(jumped,by = c('site','datetime')) %>%
mutate(month = month(datetime))
#April onward comparison of staff vs. sensor----
##Our data quality improves after April 2019 so we will be using April 2019-April 2020 as our '1 calendar year'
start_date = mdy_hms('04/01/2019 00:00:00')
staff_sensor_april <- staff_sensor %>%
dplyr::filter(datetime > start_date)
stage_v_sensor<- ggplot(staff_sensor_april,aes(x=staff_stage,y=jumped_stage,color = month)) +
geom_point() +
facet_wrap(~site,scales='free') +
stat_smooth(method = 'lm') +
labs(title= "After April 2019")+
scale_color_viridis_c() +
xlab('Regla measurement') +
ylab('Sensor stage (jumped)')+
theme_few()
# Sensor to staff conversions----
#creates a linear model using transducer data
staff_conversion <- staff_sensor_april %>%
group_by(site) %>%
nest() %>%
mutate(mods = map(data,~lm(staff_stage ~ jumped_stage, data = .x)),
#Model summaries
mod_sum = map(mods,glance),
mod_tidy = map(mods,tidy))
staff_summaries <- staff_conversion %>%
unnest(mod_sum) %>%
select(site,r.squared,p.value)
knitr::kable(staff_summaries)
| site | r.squared | p.value |
|---|---|---|
| balas | 0.9009185 | 0e+00 |
| raices | 0.8597792 | 2e-07 |
| helado | 0.9109396 | 0e+00 |
| varsovia | 0.7295712 | 0e+00 |
The outputs of this models are predictions where jumped stage is now converted to staff stage at the 0.05,0.5, and 0.95 confidence intervals. It will give us a sense of how uncertain our Q estimates are given our uncertainty in our staff relationships.
my.predict <- function(mods,sensor_data){
out <- predict.lm(mods,sensor_data,interval='confidence')
return(tibble(med_staff = out[,1] %>% as.numeric(.),
min_staff = out[,2] %>% as.numeric(.),
max_staff = out[,3] %>% as.numeric(.)))
}
jumped_staff <- jumped %>%
group_by(site) %>%
nest() %>%
rename(sensor_data = data) %>%
inner_join(staff_conversion,by = 'site') %>%
mutate(preds = map2(mods,sensor_data,my.predict)) %>%
select(-mods,-mod_sum,-mod_tidy,-data) %>%
unnest(c(sensor_data,preds))
Next we load the ratings curves. See text for rating curve methods
q <- read_csv('data/raw/rating_curves_2018_2019.csv') %>%
mutate(manual_stage = manual_stage * 100)
rating_curves<- ggplot(q,aes(x=manual_stage,y=q)) +
geom_point() +
facet_wrap(~site, scales = 'free') +
theme_few()
rating_curves
Finally, we model discharge using the jumped staff dataframe and our rating curves. Note: This is the method used to determine daily discharge in Raices, Helado, Balas and Varsovia. The remaining two tributaries (Cacao and Yure) used manning’s equation.
## Likely shapes (assign log to natural sites and linear to channel)
functional_response <- tibble(site = c('balas','helado','raices','varsovia'),
response = c('log','log',
'log','linear'))
#linear model for varsovia and log model for other 3 sites
man_q <- function(df){
if(df$response[1] == 'log'){
mod <- lm(log(q) ~ manual_stage, data = df)
} else{
mod <- lm(q ~ manual_stage, data = df)
}
}
q_mods <- q %>%
inner_join(functional_response) %>%
group_by(site) %>%
nest() %>%
mutate(q_mods = map(data,man_q),
#Model summaries
q_sum = map(q_mods,glance),
q_tidy = map(q_mods,tidy)) %>%
inner_join(functional_response)
q_summaries <- q_mods %>%
unnest(q_sum) %>%
select(site,r.squared,p.value,response)
knitr::kable(q_summaries)
| site | r.squared | p.value | response |
|---|---|---|---|
| balas | 0.8795183 | 0.0017900 | log |
| helado | 0.7896066 | 0.0005879 | log |
| raices | 0.9142658 | 0.0002036 | log |
| varsovia | 0.9795350 | 0.0000000 | linear |
q.predict <- function(mods,sensor_data){
out <- predict.lm(mods,sensor_data,interval='confidence')
return(tibble(med_q = out[,1] %>% as.numeric(.),
min_q = out[,2] %>% as.numeric(.),
max_q = out[,3] %>% as.numeric(.)))
}
q_preds <- jumped_staff %>%
rename(manual_stage = jumped_stage) %>%
group_by(site) %>%
nest() %>%
rename(sensor_data = data) %>%
inner_join(q_mods,by = 'site') %>%
mutate(preds = map2(q_mods,sensor_data,q.predict)) %>%
select(-q_mods,-q_sum,-q_tidy,-data) %>%
unnest(c(sensor_data,preds)) %>%
mutate(if_any(ends_with('_q'), ~ ifelse(response == 'log',exp(.),.))) %>%
dplyr::filter(datetime > mdy_hms('04/01/2019 00:00:00'),
if_any(ends_with('_q'), ~ . < 10)) %>%
mutate(date = as.Date(datetime))
save(q_preds, file = 'data/final/q.RData')